R/OACD Functions.R

Defines functions main_effect

Documented in main_effect

library(ggplot2)
library(dplyr)
library(reshape2)
# Main Effects ------------------------------------------------------------

#' Obtain main effect plot for a factor in our design with ggplot
#'
#' @param data
#' @param factr
#' @param response
#'
#' @return data to calculate main effect, and main effect plot
#'
#' @examples data = AEL_dat
#' main_effect(data, factr = B, response = ybar)
#' @export
main_effect <- function(data,factr,response){
  var1 = dplyr::enquo(factr)
  response = dplyr::enquo(response)

  dat <- data %>%
    mutate(!!quo_name(var1) := factor(!!var1)) %>%
    group_by(!!var1) %>%
    summarise(y = mean(!!response),.groups = 'drop')

  plot <-  dat %>%
    ggplot(aes_(x=var1,y=~y,group=1)) +
    theme_minimal()+
    ylab(bquote("Mean of "*.(response)))+
    geom_line(aes(group=1),size=1,colour = "#4292c6")+
    geom_point(size=1.5,colour = "#4292c6")


  return(list(dat = dat,plot=plot))
}




# Interaction Effects -----------------------------------------------------



#' Obtain interaction effects plot between two factors in our design with ggplot
#'
#' @param data
#' @param factr1
#' @param factr2
#' @param response
#'
#' @return data to calculate interaction effects, and interaction effects plot
#' @examples IA_effect(data = AEL_dat, factr1 = A, factr2 = B, response = ybar)
#' @export
IA_effect <- function(data,factr1,factr2,response){
  var1 = dplyr::enquo(factr1)
  var2 = dplyr::enquo(factr2)
  response = enquo(response)

  dat <- data %>%
    mutate(!!quo_name(var1) := factor(!!var1), !!quo_name(var2) := factor(!!var2)) %>%
    group_by(!!var1,!!var2) %>%
    summarise(y = mean(!!response),.groups = 'drop')


  plot <- dat %>% ggplot(aes_(x=var1,y=~y,color=var2)) +
    geom_line(aes(group = {{var2}})) +
    geom_point(size=1.5)+
    theme_bw()+
    ylab(bquote("Mean of"*.(response)))+
    theme(legend.background = element_rect(fill="gray96", size=0.5, linetype="solid"))
  return(list(dat=dat,plot=plot))
}






# OACD Table Cross Validation ---------------------------------------------



#' Compare factor effects from multiple models and visualize
#' the difference in absolute value T statistics to check for model performances
#'
#' @param models a list of models to compare
#' @param model.names a vector of names for the models in format of "model1"
#'
#' @return summary output between models, and ggplot to compare models consistency
#' @export
#'
#' @examples m1 <- lm(ybar~(A+B+C)^2,data =AEL_dat)
#' m2 <- lm(ybar~(A+B)^2,data =AEL_dat)
#'
#' models <- list(m1,m2)
#' models.names <- c("Model1","Model2")
#' summs_models(models,models.names)
summs_models <- function(models,model.names) {


  coefficients = unlist(lapply(models,coefficients))
  coeff.names = unique(names(coefficients))[-1]

  models.data = list()

  for (i in 1:length(models)) {
    models.data[[i]] = as.data.frame(t(abs(summary(models[[i]])$coefficients[-1,3])))

    gg.dat = t(bind_rows(models.data))
    gg.dat[is.na(gg.dat)] = 0
  }
  colnames(gg.dat) = model.names
  Factors = factor(coeff.names,levels = coeff.names)

  summ =  jtools::export_summs(models,
                      error_format = '',
                      coefs  = coeff.names,model.names =model.names,
                      stars = c(`'`=0.1,`*` = 0.05, `**` = 0.01, `***` = 0.001),
                      digits = 4)

  dat1 = melt(gg.dat)
  colnames(dat1) = c("Factors","Model","absolute_T_stat")

  gg.plot = ggplot(dat1) +
    theme_minimal() +
    geom_line(aes(x=Factors,y=absolute_T_stat,group=Factors),col="gray80",size=0.5)+
    geom_point(aes(x = Factors, y = absolute_T_stat, color = Model),size=4)+
    coord_flip()+
    scale_x_discrete(limits = rev(unique(sort(dat1$Factors))))+
    labs(color="",x="",y="|t-statistic|")+
    theme(legend.position = "top")


  p = c() # parameters
  N = c() # Design Runs
  k = c() # Levels in each design
  D_efficiencies = c()

  for (i in 1:length(models)) {
    p[i] = dim( model.matrix( models[[i]] ) )[2]
    N[i] = dim( model.matrix( models[[i]] ))[1]

    if(sum(sapply(models[[i]]$model, class) == "AsIs")!=0){

      k[i]= ncol(models[[i]]$model[,-which(sapply(models[[i]]$model, class) == "AsIs")])-1
    }
    else{

      k[i]=ncol(models[[i]]$model)-1
    }


    D_efficiencies[i]=
      ((det(t(model.matrix(models[[i]]))%*%model.matrix(models[[i]])))^(1/p[i]))/N[i]
  }

  D_eff_table = data.frame(model.names,k,D_efficiencies)


  return(list(table=summ,t_dat=gg.dat,plot=gg.plot,D_effs=D_eff_table))
}







# D-efficiency ------------------------------------------------------------



#' Calculate the D-efficiency between different designs
#'
#' @param mats a single or list of designs
#' @param mat.names names for designs from list of designs (optional)
#'
#' @return D-efficiency of designs, and ggplot to compare designs D-efficiency
#' @export
#'
#' @examples
D_effs <- function(mats,mat.names=NULL) {
  p = c()
  N = c()
  D.effs = c()

  if(is.list(mats)==T && is.null(mat.names)==F){

    for (i in 1:length(mats)) {
      p[i] = dim( mats[[i]] )[2]
      N[i] = dim( mats[[i]] )[1]

      D.effs[i]=
        ((det(t(mats[[i]])%*% mats[[i]]))^(1/p[i]))/N[i]
    }

    deffs.dat = data.frame(mat.names,D.effs)

    deffs.plot = ggplot(deffs.dat,
                        aes(x=mat.names, y=D.effs, group=mat.names))+
      geom_point(aes(color=mat.names))+
      geom_line(aes(color=mat.names))+
      theme_minimal()+
      labs(color="")+
      theme(legend.position = "top")

    return(list(D.effs = deffs.dat, plot = deffs.plot))

  }
  else if (is.list(mats)==T && is.null(mat.names)==T){

    for (i in 1:length(mats)) {
      p[i] = dim( mats[[i]] )[2]
      N[i] = dim( mats[[i]] )[1]

      D.effs[i]=
        ((det(t(mats[[i]])%*% mats[[i]]))^(1/p[i]))/N[i]
    }
    return(D.effs)
  }
  else{
    p = dim(mats)[2]
    N = dim(mats)[1]
    D.effs = ((det(t(mats)%*% mats))^(1/p))/N
    return(D.effs)
  }
}





# D-efficiency for Multiple Models
#
# @param models a list of models
# @param model.names a vector of names for the models in format of "model1"
#
# @return a datafame with the D-efficiencies of different designs from their respective model
# @export
#
# @examples
#D_effs_models <- function(models,model.names) {
#  p = c() # parameters
#  N = c() # Design Runs
#  k = c() # Levels in each design
#  D.efficiencies = c()
#
#  for (i in 1:length(models)) {
#    p[i] = dim( model.matrix( models[[i]] ) )[2]
#    N[i] = dim( model.matrix( models[[i]] ))[1]
#
#   if(sum(sapply(models[[i]]$model, class) == "AsIs")!=0){
#
#      k[i]= ncol(models[[i]]$model[,-which(sapply(models[[i]]$model, class) == "AsIs")])-1
#    }
#    else{
#
#     k[i]=ncol(models[[i]]$model)-1
#    }
#
#
#   D.efficiencies[i]=
#      ((det(t(model.matrix(models[[i]]))%*%model.matrix(models[[i]])))^(1/p[i]))/N[i]
#  }
#
#  D.eff.table = data.frame(model.names,k,D.efficiencies)
#
#  return(D.eff.table)
#}




#' @title Second Order Model Design Matrix
#'
#' @description Obtain second order design matrix from a specified model. Optionally, be
#' able to extract specific terms from the matrix (i.e linear terms, quadratic terms, or bilinear terms)
#' @param model Input a second order model
#' @param terms specify if you only want to extract certain terms from the second order design matrix
#'
#' @return A design matrix from initial second order model
#' @export
#'
#' @examples model2 <- lm(ybar ~ (A+B)^2 + I(A^2)+I(B^2), data= AEL_dat)
#' second_order_mat(model2)
#' second_order_mat(model2, terms = "linear")
#' second_order_mat(model2, terms = "quadratic")
#'
second_order_mat <- function(model,terms="full") {

  k = ncol(model$model[,-which(sapply(model$model, class) == "AsIs")])-1
  b = length(which(attr(model$terms,"order")==2)) # bilinear terms
  q = ncol(model$model[,which(sapply(model$model, class) == "AsIs")])   # quad terms in model
  p = dim(model.matrix(model))[2] # parameters in model


  I = attr(model.matrix(model),"dimnames")[[2]][1]
  L = attr(model.matrix(model),"dimnames")[[2]][2:(k+1)]
  Q = attr(model.matrix(model),"dimnames")[[2]][(k+2):(k+(q+1))]
  B = attr(model.matrix(model),"dimnames")[[2]][(k+(q+2)):p]


  M = model.matrix(model)
  order = c(I,Q,L,B)


  if (terms=="full"){
    return(M[,order])
  }
  else if (terms =="linear"){
    return(M[,L])
  }
  else if (terms == "bilinear"){
    return(M[,B])
  }
  else if (terms == "quadratic"){
    return(M[,Q])
  }
  else {
    stop("available terms: full, linear, bilinear, quadratic")
  }
}










#' Lenth’s method: testing effect significance for experiments without variance estimates
#'
#' @param mod A linear model
#' @param alpha specify the significance level. Default is alpha=0.05
#'
#' @return PSE,ME,SME using Lenth's method. Also returns a plot visualizing any factor which is significant using the obtained
#' values of ME, and SME.
#' @export
#'
#' @examples m1 <- lm(ybar ~ (A+B+C+D)^2,data=AEL_dat)
#' Lenth_method(m1)
#' Lenth_method(m1,alpha=0.01)
Lenth_method <- function(mod,alpha=0.05){
  if (inherits(mod, "lm")) {
    i <- pmatch("(Intercept)", names(coef(mod)))
    if (!is.na(i))
      obj <- 2 * coef(mod)[-pmatch("(Intercept)", names(coef(mod)))]
  }
  b <- obj
  m <- length(b)
  d <- m/3
  s0 <- 1.5 * median(abs(b))
  cj <- as.numeric(b[abs(b) < 2.5 * s0])
  PSE <- 1.5 * median(abs(cj))

  ME <- qt(1 - alpha/2, d) * PSE
  gamma <- (1 + (1 - alpha)^(1/m))/2
  SME <- qt(gamma, d) * PSE

  results <- tibble(alpha,PSE,ME,SME)

  dat <- tibble("coeff"= factor(names(coef(mod))[-1],levels = names(coef(mod))[-1]),
                "estimates" = coef(mod)[-1]) %>%
    mutate(lower_ME = estimates-ME,
           upper_ME = estimates+ME,
           lower_SME = estimates-SME,
           upper_SME = estimates+SME)

  lenth_plot <-  ggplot(dat, aes(x=coeff, y=estimates)) +
    geom_segment( aes(x=coeff, xend=coeff, y=0, yend=estimates), color="grey") +
    geom_point( color="#5fad9a", size=4) +
    theme_classic() +
    geom_hline(yintercept=ME, linetype='dashed', col = 'red',size=1.05)+
    annotate("text",x=-Inf,y=ME,hjust=-0.4,vjust=-0.5,label="ME",fontface="italic",size=3)+
    geom_hline(yintercept=-ME, linetype='dashed', col = 'red',size=1.05)+
    annotate("text",x=-Inf,y=-ME,hjust=-0.4,vjust=-0.5,label="ME",fontface="italic",size=3)+
    geom_hline(yintercept=SME, linetype='dotdash', col = 'blue',size=1.05)+
    annotate("text",x=-Inf,y=SME,hjust=-0.4,vjust=-0.5,label="SME",fontface="italic",size=3)+
    geom_hline(yintercept=-SME, linetype='dotdash', col = 'blue',size=1.05)+
    annotate("text",x=-Inf,y=-SME,hjust=-0.4,vjust=-0.5,label="SME",fontface="italic",size=3)+
    geom_hline(yintercept=0, linetype='dotted', col = 'black')+
    theme(
      panel.border = element_blank(),
      axis.ticks.x = element_blank()) +
    xlab("") +
    ylab("effects")
  return(list(results = results,margins_dat = dat,plot=lenth_plot))
}










#' Half-Normal Effects Plots
#'
#' @param obj object of class lm
#' @param alpha significance level for the Lenth method. Default is 0.05
#' @param signif_label If TRUE, only the significant factors according to the Lenth method
#' (significance level given by alpha) are labeled
#'
#' @return A dataframe with the absolute effects and half-normal qunatiles.
#' A ggplot version of halfnormal plot for factorial effects is returned
#' @export
#'
#' @examples m1 <- lm(lns2 ~ (A+B+C+D)^4,data=AEL_dat)
#' halfnormal(m1)
#' halfnormal(m1,alpha=0.1)
#' halfnormal(m1,alpha=0.2,signif_label=TRUE)
halfnormal <- function(obj,alpha=0.05,signif_label=FALSE){
  if (inherits(obj, "lm")) {
    i <- pmatch("(Intercept)", names(coef(obj)))
    if (!is.na(i))
      effects <- coef(obj)[-pmatch("(Intercept)", names(coef(obj)))]
    obj <- 2 * effects
  }
  b <- obj
  m <- length(b)
  d <- m/3
  s0 <- 1.5 * median(abs(b))
  cj <- as.numeric(b[abs(b) < 2.5 * s0])
  PSE <- 1.5 * median(abs(cj))
  ME <- qt(1 - alpha/2, d) * PSE

  effs <- sort(abs(obj))
  names <- names(effs)
  r <- c(1:m)
  zscore<-c(rep(0,m))
  for (i in 1:m) {
    zscore[i]<-qnorm( ( ( r[i]-.5)/m+1)/2 )
    if(signif_label){
      logc<-(abs(effs[i])<= ME)
      if (logc) {names[i]<-" "}}
  }

  dat <- tibble("effects"=names,
                "absolute_effects"=effs,
                "half_normal_quantiles"=zscore
  )


  plot <- ggplot(dat,aes(x= absolute_effects,
                         y = half_normal_quantiles,
                         label=effects)) +
    geom_point(color = "#1b9e77", size = 2)+
    geom_text(hjust = 0, nudge_x = 0.01)+
    theme_classic()+
    labs(x="absolute effects",y="half-normal quantiles")

  return(list(dat=dat,plt=plot))
}
toledo60/OACD documentation built on Feb. 15, 2021, 2:25 a.m.